library(foreign)
set.seed(1234567891)
fnum=10

outnames<- c("res1.dta", "res2.dta", "res3.dta", "res4.dta", "res5.dta", "res6.dta", "res7.dta", "res8.dta", "res9.dta", "res10.dta")

print("START")
print(Sys.time())

alldata<- read.dta("dataset_for_R_1.dta")
attach(alldata)

childsamp<- data.frame(alldata[samp1==1,])
names(childsamp)<-c("samp1", "samp2", "ever.married", "educ.attain", "educ.qtile", "name")
child_educ_demean<- childsamp$educ.attain - mean(childsamp$educ.attain)
temp<- cbind(childsamp, child_educ_demean)
childsamp<- data.frame(temp)
names(childsamp)<- c("samp1", "samp2", "ever.married", "educ.attain", "educ.qtile", "name","educ.attain.demean")

adultsamp<- data.frame(alldata[samp2==1,])
names(adultsamp)<-c("samp1", "samp2", "ever.married", "educ.attain", "educ.qtile", "name")
adult_educ_demean<- adultsamp$educ.attain - mean(adultsamp$educ.attain)
temp<- cbind(adultsamp, adult_educ_demean)
adultsamp<- data.frame(temp)
names(adultsamp)<- c("samp1", "samp2", "ever.married", "educ.attain", "educ.qtile", "name","educ.attain.demean")

N_child<- nrow(childsamp)
N_adult<- nrow(adultsamp)

nreps<- 5
rho<-0
nnames <- 5000

RES <- matrix(data=0, nrow=1, 17) 

rsq<- function(y, i) {
	orig<- data.frame(matrix(c(i,y), nrow=length(y), ncol=2))
	names(orig)<- (c("i", "y"))
	new<- aggregate(y, list(i), FUN="mean")
	names(new)<- c("i.new", "ybar")
	combo<- merge(orig, new, by.x="i", by.y="i.new")
	reg<- lm(combo$y ~ combo$ybar)
	err<- sum(reg$res^2)
	tot<- sum((y-mean(y))^2)
	r<- 1-(err/tot)
	return(r)
	}



name.assign<- function(X, N) {
	
		#inialize name matrix
		name<- matrix(data=0, nrow=N, ncol=1)
	
		x <- matrix(data=c(rep(1, N), X), nrow=N, ncol=2) 
		p<- matrix(data=exp(x%*% delta_mat) / (rowSums(exp(x %*% delta_mat))), nrow=N, ncol=N_names)
		cu.p<- matrix(data=rep(0, length(p)), nrow=N, ncol=N_names)
		for (j in 2:N_names) cu.p[,j]<-cu.p[,j-1]+p[,j] 
		name_temp<- matrix(data=0, nrow=N, ncol=N_names)
		namevec<- matrix(data=runif(N), nrow=N, ncol=1)
		name_temp[,N_names]<- (cu.p[,N_names]<namevec)
		n<- N_names-1
		for (j in 1:n) {
				name_temp[,j]<- (cu.p[,j]<namevec & cu.p[,j+1]>=namevec)
			}
			name_temp_2<- matrix(data=c(1:N_names), nrow=N_names, ncol=1)
			name<- name_temp%*%name_temp_2
		return(name)
	}
	
for (N_names in nnames) {
	
	for (sigma2_con in c(0.5, 1, 2.5, 5, 10)) {
		
		for(sigma2_ses in c(0, 0.005, 0.01, 0.015, 0.02, 0.025, 0.05, 0.1, 0.15, 0.25, 1)) {
		
		for (rep in c(1:nreps)) {

delta_con <- sqrt(sigma2_con)*rnorm(N_names)
delta_ses <- (rho*sigma2_ses/sigma2_con)*delta_con + sqrt(sigma2_ses/(1-rho^2))*rnorm(N_names)
delta_mat<- t(cbind(delta_con, delta_ses))

names_child<- name.assign(childsamp$educ.attain.demean, nrow(childsamp))
names_adult<- name.assign(adultsamp$educ.attain.demean, nrow(adultsamp))


childsamp_named<- data.frame(cbind(childsamp, names_child))
names(childsamp_named)<- c(names(childsamp), "assigned.name")
adultsamp_named<- data.frame(cbind(adultsamp, names_adult))
names(adultsamp_named)<- c(names(adultsamp), "assigned.name")

child_means<- aggregate(childsamp_named$educ.attain, list(childsamp_named$assigned.name), FUN=mean)
child_means<- data.frame(child_means)
names(child_means) <- c("assigned.name", "educ.pseudo")

merged_adult<- merge(adultsamp_named, child_means, by.x="assigned.name", by.y="assigned.name")
		
name_counter<- data.frame(cbind(adultsamp_named$assigned.name, rep(1, nrow(adultsamp_named))))

names(name_counter)<- c("assigned.name", "ones")	
namecounts<- aggregate(name_counter$ones, list(name_counter$assigned.name), FUN=sum)	
namecounts<- data.frame(namecounts)
names(namecounts)<- c("assigned.name", "count")

sortname<- sort(namecounts$count, decreasing=TRUE)
num<- sum(sortname[1:50])
denom<- sum(sortname)

n_names_linked<- length(sortname)

share50 <- num/denom
		
### Record: (1) r-squared (2) share top 50	(3) regression of married on pseudo education (4) variance of pseudo education	
merged_adult_clean<- data.frame(na.omit(merged_adult))
names(merged_adult_clean)<- names(merged_adult)

n_adult_linked<- nrow(merged_adult_clean)

q<- quantile(merged_adult_clean$educ.pseudo)
q1<- merged_adult_clean$educ.pseudo < q[2]
n_q1<- sum(q1)
q2<- merged_adult_clean$educ.pseudo < q[3] & merged_adult_clean$educ.pseudo >=q[2]
n_q2<- sum(q2)
q3<- merged_adult_clean$educ.pseudo < q[4] & merged_adult_clean$educ.pseudo >=q[3]
n_q3<- sum(q3)
q4<- merged_adult_clean$educ.pseudo >=q[4]
n_q4<- sum(q4)

merged_adult_quartile<- matrix(data=cbind(merged_adult_clean$ever.married, q1, q2, q3, q4), nrow=nrow(merged_adult_clean), ncol=5)
merged_adult_quartile<- data.frame(merged_adult_quartile)
names(merged_adult_quartile)<- c("ever.married", "q1", "q2", "q3", "q4")

pseudo.reg<- lm(ever.married ~ educ.pseudo, data=merged_adult_clean)
beta.pseudo<- pseudo.reg$coefficients[2]

var.pseudo<- var(merged_adult_clean$educ.pseudo)

qtile.reg<- lm(ever.married ~ q2 + q3 + q4, data=merged_adult_quartile)
beta.q2<- qtile.reg$coefficients[2]
beta.q3<- qtile.reg$coefficients[3]
beta.q4<- qtile.reg$coefficients[4]

rsq.val <- rsq(childsamp_named$educ.attain, childsamp_named$assigned.name)

newrow<- matrix(data=c(fnum, rep, sigma2_con, sigma2_ses, beta.pseudo, var.pseudo, rsq.val, share50, n_names_linked, N_adult, n_adult_linked, beta.q2, beta.q3, beta.q4, n_q2, n_q3, n_q4), nrow=1, ncol=17)

print(newrow)

RES<- rbind(RES, newrow)

print("sigma2_con=")
print(sigma2_con)
print("sigma2_ses=")
print(sigma2_ses)
print("interation #")
print(rep)
print("ENDS at")
print(Sys.time())		

			} #closes reps	
		}	#closes sigma2_ses		
	}	#closes sigma2_con	
}	#closes N_names

RES<- RES[-1,]
RES.data<- data.frame(RES)

names(RES.data)<- c("file_num", "iteration", "sigma2_con", "sigma2_ses", "beta_pseudo", "var_pseudo", "rsquared", "share50", "n_names_linked", "n_adult", "n_adult_linked", "beta_q2", "beta_q3", "beta_q4", "n_q2", "n_q3", "n_q4")

write.dta(RES.data, file=outnames[fnum])
